Guided section

library(tidyverse)
library(plotly)

1. Read data

data <- read_csv('./gapminder_clean.csv')
data <- data %>%
    select(-1) %>%
    rename(co2em = `CO2 emissions (metric tons per capita)`)

2. Scatter plot of CO2 emissions and GDP in 1962

data1962 <- data %>%
    filter(Year == 1962) %>%
    select(gdpPercap, co2em) %>%
    drop_na()
ggplot(data = data1962) +
    geom_point(mapping = aes(
        x = gdpPercap,
        y = co2em)) +
    labs(x = "GDP per capita", y = "CO2 emissions per capita (metric tons)")

3. Pearson correlation of CO2 emissions and GDP

cor.test(data1962 %>% pull(gdpPercap), data1962 %>% pull(co2em))
## 
##  Pearson's product-moment correlation
## 
## data:  data1962 %>% pull(gdpPercap) and data1962 %>% pull(co2em)
## t = 25.269, df = 106, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8934697 0.9489792
## sample estimates:
##       cor 
## 0.9260817

4. Year of strongest correlation between CO2 emissions and GDP

corrs <- data %>%
    group_by(Year) %>%
    select(Year, gdpPercap, co2em) %>%
    drop_na() %>%
    summarise(correlation = cor(gdpPercap, co2em))
maxi <- lapply(corrs, max)

The coefficient is 0.9387918 in the year 2007.

5. Interactive scatter plot of CO2 emissiosn and GDP

max_em_year_data <- data %>%
    filter(Year == maxi$Year) %>%
    select(gdpPercap, co2em, pop, continent, `Country Name`) %>%
    drop_na()
fig <- ggplot(data = max_em_year_data) +
    geom_point(aes(
        x = gdpPercap,
        y = co2em,
        size = pop,
        color = continent,
        text = paste("Country: ", `Country Name`,
             "\nGDP: ", gdpPercap,
             "\nCO2 emissions: ", co2em))) +
    xlab("GDP per capita") +
    ylab("CO2 emissions per capita (metric tons)") +
    ggtitle(str_glue("GDP vs CO2 emissions per capita in ", maxi$Year))
## Warning: Ignoring unknown aesthetics: text
ggplotly(fig, tooltip = "text")

Open section

TODO